using DataFrames
using QuadGK
using Distributions
using JLD
using Optim
using ForwardDiff
using CSV
using LinearAlgebra
using SpecialFunctions
using Random
using DelimitedFiles
using SparseArrays

clearconsole()

#-------------------------------------------------------------
# include Functions
#-------------------------------------------------------------
#cd("$(pwd())/Dropbox/Heterogeneity/Software/KS_Simulation/")
readDir = "$(pwd())/Functions/"
include(readDir *"logSpline_Procedures.jl");

#-------------------------------------------------------------
# choose specification file
#-------------------------------------------------------------
nfVARSpec = "3"
specDir   = "$(pwd())/SpecFiles/"
include(specDir * "/fVARspec" * nfVARSpec * ".jl")

#-------------------------------------------------------------
# load data
#-------------------------------------------------------------
nDataSel = "3"
dataDir  = "$(pwd())/data/Para" * nDataSel *"/"

agg_data    = readdlm(dataDir * "aggvar_sim.csv", ',', Float64, '\n'; skipstart=0);
densdraws   = readdlm(dataDir * "a_cross_sim.csv", ',', Float64, '\n'; skipstart=0)';
employdraws = readdlm(dataDir * "empl_cross_sim.csv", ',', Float64, '\n'; skipstart=0)';
ss_K        = readdlm(dataDir * "ss_K.csv", ',', Float64, '\n'; skipstart=0);
K_exact     = agg_data[:,2] .+ ss_K;

# subsequently use the same knots regardless of sample size N,T
#knots_all = quantile(densdraws[employdraws.==1], quant_vec)
knots_all = quantile(densdraws[employdraws.==1]/mean(K_exact), quant_vec)

#-------------------------------------------------------------
# log spline density estimation over K and t
#-------------------------------------------------------------
MDD_GoF_sum  = zeros(K_vec_n, 3);

try mkdir("$(pwd())/results/") catch; end;
try mkdir("$(pwd())/results/Para" *nDataSel * "/") catch; end;
global sNameDir  = "fVAR" * nfVARSpec
global savedir   = "$(pwd())/results/Para" *nDataSel * "/" * sNameDir *"/";
try mkdir(savedir) catch; end

for ii = 1:K_vec_n
    K             = K_vec[ii]
    knots         = knots_all[quant_sel[ii,:].==1]
    PhatDensValue = zeros(T, length(xgrid));
    PhatDensCoef  = zeros(T, K);
    PhatDensNorm  = zeros(T, 1);
    PhatlogLike   = zeros(T, 1);
    Vinv_all      = zeros(K*T, K);
    N_all         = zeros(T, 1);
    Period_all    = zeros(T, 1);
    MeanAssets_all= zeros(T, 1);
    N_details     = zeros(T, 4+K);

    for tt = 1:T
        time_init_loop = time_ns()

        # time t data
        densdraws_t     = densdraws[1:N,tt]./K_exact[tt]
        employdraws_t   = employdraws[1:N,tt]
        selecteddraws_t = densdraws_t[employdraws_t.==1]
        N_all[tt]       = length(selecteddraws_t)
        MeanAssets_all[tt] = mean(selecteddraws_t)

        # count observations with knot restriction
        # recall that there are K-1 knots
        N_knots      = zeros(1,K);
        N_knots[1,1] = sum(selecteddraws_t.<=knots[1])
        for kk = 2:(K-1)
            N_knots[1,kk] = sum((selecteddraws_t.>knots[kk-1]) .& (selecteddraws_t.<=knots[kk]))
        end
        N_knots[1,K] = sum(selecteddraws_t.>knots[end])
        println("Number of obs in knot brackets: $(N_knots)")
        println("Max knot:        $(knots[K-1])")

        # compute MLE
        if tt == 1
            alpha_initial = zeros(K)
        else
            alpha_initial = PhatDensCoef[tt-1,:]
        end
        # alpha_initial = zeros(K)

        if TopCodeFlag == 0
            f1(x)  = logspline_obj(selecteddraws_t, x, knots, minimum(xgrid), maximum(xgrid)) # without top-coding
            td     = TwiceDifferentiable(f1, alpha_initial; autodiff  = :forward )
            results_t = try
                optimize(td, alpha_initial, Newton())
            catch
                0 #optimize(f1, alpha_initial, BFGS()) #optimize(td, zeros(K), Newton())
            end
            if results_t == 0
                results_t = try
                    optimize(td, zeros(K), Newton())
                catch
                    optimize(f1, zeros(K), BFGS()) #optimize(td, zeros(K), Newton())
                end
            end

        else
            f2(x)  = logspline_obj_topcode(selecteddraws_t, x, knots, minimum(xgrid), maximum(xgrid)) # with top-coding
            td     = TwiceDifferentiable(f2, alpha_initial; autodiff  = :forward )
            results_t = try
                optimize(td, alpha_initial, Newton())
            catch
                optimize(f2, alpha_initial, BFGS())
            end
        end

        coef_t = results_t.minimizer
        C_topcode = maximum(selecteddraws_t)
        N_max     = sum(selecteddraws_t.==C_topcode)

        if (TopCodeFlag == 0) | (N_max == 1)
            # likelihood w/o top coding
            PhatlogLike[tt] = - N_all[tt]*results_t.minimum
            pi_hat = 0
            println("No top coding")
        else
            # likelihood w top coding
            println("Top coded value is $(C_topcode), number of obs is $(N_max)")
            pi_hat = N_max/N_all[tt]
            PhatlogLike[tt] = - N_all[tt]*results_t.minimum + (N_all[tt]-N_max)*log(1-pi_hat) + N_max*log(pi_hat)
        end


        # results
        PhatDensCoef[tt,:]  = coef_t
        PhatDensNorm[tt,:]  = lnpdfNormalize(coef_t',knots, minimum(xgrid), maximum(xgrid))
        PhatDensValue[tt,:] = pdfEval(xgrid,coef_t,knots,[PhatDensNorm[tt,1]])';
        N_details[tt,:]     = [N_all[tt] N_max pi_hat C_topcode N_knots]

        # compute negative inverse hessian
        # changed type of hessian_sqrtK output to "Symmetric"
        # note that V_t type is also Symmetric

        if (TopCodeFlag == 0) | (N_max == 1)
            # Hessian w/o top coding
            Hess_t = hessian_loglh(PhatDensCoef[tt,:], knots, minimum(xgrid), maximum(xgrid))
        else
            # Hessian w top coding
            Hess_t = (N_all[tt]-N_max)/N_all[tt]*hessian_loglh(PhatDensCoef[tt,:], knots, minimum(xgrid), C_topcode)
        end

        Vinv_t = - Hess_t
        if isposdef(Vinv_t) == false
            Vinv_eig = eigen(Vinv_t)
            Vinv_t = Symmetric(Vinv_eig.vectors*Diagonal(abs.(Vinv_eig.values))*Vinv_eig.vectors')
            println("Flipped neg eigenvalues")
        end
        Vinv_all[K*(tt-1)+1:K*tt,:] = Vinv_t

        println("K = $(K), Period $(tt)")
        println("Vinv_t is positive definite: $(isposdef(Vinv_t))")
        time_loop=signed(time_ns()-time_init_loop)/1000000000
        println("Elapsed Time $(time_loop) seconds")

    end

    # compress coefficients, ~ = PhatDensCoef_mean
    (PhatDensCoef_factor, PhatDensCoef_lambda, PhatDensCoef_mean ) = coefCompress(PhatDensCoef)
    Ktilde = size(PhatDensCoef_factor)[2]
    println("----------------")
    println("Compression Step")
    println("K = $(K), K-tilde = $(Ktilde)")
    println("----------------")
    println("")

    # Goodness of Fit (GoF) is log likelihood
    MDD_GoF   = zeros(T, 1);
    for tt = 1:T
        MDD_GoF[tt] = PhatlogLike[tt]
    end

    MDD_GoF_sum[ii,:] = [K Ktilde sum(MDD_GoF)]

    # save results
    sNameFile = "K" * string(K) * "_fVAR" * nfVARSpec
    CSV.write(savedir * sNameFile * "_PhatDensValue.csv", DataFrame(PhatDensValue,:auto))
    CSV.write(savedir * sNameFile * "_PhatDensCoef.csv", DataFrame(PhatDensCoef,:auto))
    CSV.write(savedir * sNameFile * "_PhatDensCoef_factor.csv", DataFrame(PhatDensCoef_factor,:auto))
    CSV.write(savedir * sNameFile * "_PhatDensCoef_lambda.csv", DataFrame(PhatDensCoef_lambda,:auto))
    CSV.write(savedir * sNameFile * "_PhatDensCoef_mean.csv", DataFrame(PhatDensCoef_mean,:auto))
    CSV.write(savedir * sNameFile * "_Vinv_all.csv", DataFrame(Vinv_all,:auto))
    CSV.write(savedir * sNameFile * "_N_all.csv", DataFrame(N_all,:auto))
    CSV.write(savedir * sNameFile * "_MeanAssets_all.csv", DataFrame(MeanAssets_all,:auto))
    CSV.write(savedir * sNameFile * "_N_details.csv", DataFrame(N_details,:auto))
    CSV.write(savedir * sNameFile * "_MDD_GoF.csv", DataFrame(MDD_GoF,:auto))

end

sNameFile = "fVAR" * nfVARSpec;
CSV.write(savedir * sNameFile * "_MDD_GoF_sum.csv", DataFrame(MDD_GoF_sum,:auto))
CSV.write(savedir * sNameFile * "_knots_all.csv", DataFrame(knots_all',:auto))

# evaluate and save true density
# measureCoef_2_1 = Array(simul_data[:,8])
# measureCoef_2_2 = Array(simul_data[:,9])
# measureCoef_2_3 = Array(simul_data[:,10]);
# moment_2_1 = Array(simul_data[:,16])
# moment_2_2 = Array(simul_data[:,17])
# moment_2_3 = Array(simul_data[:,18]);
# aggEmployment = 0.93 # from Winberry's calibration
#
# PDensCoef = [measureCoef_2_1 measureCoef_2_2 measureCoef_2_3]
# PDensValue = pdfWinberry(PDensCoef, moment_2_1, moment_2_2, moment_2_3, xgrid, aggEmployment)
# CSV.write("$(pwd())/results/" *  "PDensValue.csv", DataFrame(PDensValue,:auto))
